library(tidyverse)
library(Seurat)
Spapros is expecting scanpy format data with raw counts. Load based on established R workflow then convert. ### Read in GSE136831
original.dir <- ('~/vcalab-files/datasets/GSE136831_IPF_Cell_Atlas/')
metadata <- read_tsv(paste0(original.dir,'GSE136831_AllCells.Samples.CellType.MetadataTable.txt.gz'))
Rows: 312928 Columns: 9── Column specification ─────────────────────────────────────────────────────────────────────────────────────────
Delimiter: "\t"
chr (7): CellBarcode_Identity, CellType_Category, Manuscript_Identity, Subclass_Cell_Identity, Disease_Identi...
dbl (2): nUMI, nGene
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
metadata <- as.data.frame(metadata)
rownames(metadata) <- metadata$CellBarcode_Identity
features <- read_tsv(paste0(original.dir,'GSE136831_AllCells.GeneIDs.txt.gz'))
Rows: 45947 Columns: 2── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: "\t"
chr (2): Ensembl_GeneID, HGNC_EnsemblAlt_GeneID
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
cells <- read_tsv(paste0(original.dir,'GSE136831_AllCells.cellBarcodes.txt.gz'), col_names = F)
Rows: 312928 Columns: 1── Column specification ───────────────────────────────────────────────────────────────────────────────────────────────────
Delimiter: "\t"
chr (1): X1
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
counts <- Matrix::readMM(paste0(original.dir,'GSE136831_RawCounts_Sparse.mtx.gz'))
rownames(counts) <- features$HGNC_EnsemblAlt_GeneID
colnames(counts) <- cells$X1
ipfatlas.cells <- CreateSeuratObject(counts=counts, meta.data=metadata)
Warning: Data is of class dgTMatrix. Coercing to dgCMatrix.
rm(counts,cells,features,metadata)
gc()
used (Mb) gc trigger (Mb) max used (Mb)
Ncells 4287022 229.0 6682443 356.9 6682443 356.9
Vcells 1053056767 8034.2 3672764273 28021.0 4194266324 31999.8
ipfatlas.cells[['percent.mt']] <- PercentageFeatureSet(ipfatlas.cells, pattern='^MT-')
VlnPlot(ipfatlas.cells, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, pt.size = 0)
Warning: Default search for "data" layer in "RNA" assay yielded no results; utilizing "counts" layer instead.
ipfatlas.cells <- subset(ipfatlas.cells, subset = nFeature_RNA > 200 & nFeature_RNA < 5000 & percent.mt < 10)
This is remarkably not a solved problem but sceasy has functions for it. have to back-convert the Seurat5 object to a v3-like assay.
ipfatlas.cells[['RNA']] <- as(ipfatlas.cells[['RNA']],'Assay')
Warning: No layers found matching search pattern providedWarning: No layers found matching search pattern providedWarning: No layers found matching search pattern providedWarning: Layer ‘data’ is emptyWarning: No layers found matching search pattern providedWarning: No layers found matching search pattern providedWarning: Layer ‘scale.data’ is emptyWarning: Assay RNA changing from Assay5 to Assay
sceasy::convertFormat(ipfatlas.cells, from='seurat',to='anndata', outFile = 'ipfatlas.adata')
Warning: Dropping single category variables:orig.ident
AnnData object with n_obs × n_vars = 253424 × 45947
obs: 'nCount_RNA', 'nFeature_RNA', 'CellBarcode_Identity', 'nUMI', 'nGene', 'CellType_Category', 'Manuscript_Identity', 'Subclass_Cell_Identity', 'Disease_Identity', 'Subject_Identity', 'Library_Identity', 'percent.mt'
var: 'name'
Weird behavior when reticulate instantiated by other packages, therefore restart R.
.rs.restartR()
NULL
Using the SPAPROS workflow documented by theis lab which is python/scanpy based.
import pandas as pd
import scanpy as sc
import spapros as sp
sc.settings.verbosity=0
sc.logging.print_header()
scanpy==1.9.6 anndata==0.10.3 umap==0.5.5 numpy==1.26.3 scipy==1.11.4 pandas==1.5.3 scikit-learn==1.1.3 statsmodels==0.14.1 igraph==0.9.11 pynndescent==0.5.11
print(f"spapros=={sp.__version__}")
spapros==0.1.4
Access adata object on the python side. to avoid name conversions throughout python code, just make it “adata.” Unpredictable behavior when porting from R to python environment, so store as h5ad and read back in.
adata = sc.read_h5ad('ipfatlas.adata')
adata
AnnData object with n_obs × n_vars = 253424 × 45947
obs: 'nCount_RNA', 'nFeature_RNA', 'CellBarcode_Identity', 'nUMI', 'nGene', 'CellType_Category', 'Manuscript_Identity', 'Subclass_Cell_Identity', 'Disease_Identity', 'Subject_Identity', 'Library_Identity', 'percent.mt'
var: 'name'
Filtering has already been done on the Seurat side.
sc.pp.normalize_total(adata, target_sum=10000) # 10k target matches default Seurat scale factor
sc.pp.log1p(adata)
sc.pp.highly_variable_genes(adata,flavor="cell_ranger",n_top_genes=1000)
sc.pp.pca(adata, svd_solver='lobpcg') # had issues with arpack
/home/vincent/.local/lib/python3.9/site-packages/scipy/sparse/linalg/_eigen/_svds.py:487: UserWarning: Exited at iteration 20 with accuracies
[4.79425120e-08 4.33860979e-07 4.25535729e-08 1.19035951e-07
4.12742959e-08 1.13385877e-07 7.08756943e-07 1.37830810e-06
1.51369674e-07 5.47363570e-07 2.09337811e-06 3.11242987e-07
1.04559607e-06 8.92120768e-08 2.06874692e-07 1.38487152e-07
1.12949811e-06 7.70405333e-07 1.15730129e-06 2.44537505e-06
4.45034612e-07 1.09552891e-06 2.38279297e-07 1.70460495e-06
4.95124826e-07 4.02370540e-07 2.41765186e-06 6.03523887e-07
2.19781845e-06 1.21796480e-06 5.84655170e-07 4.03826669e-06
1.93481355e-06 2.18713447e-06 2.79229665e-06 6.98544531e-06
1.46472059e-06 4.01444386e-06 3.26535748e-06 1.65977582e-05
1.99434152e-05 1.90039855e-04 2.84961516e-04 8.43750002e-04
5.89064658e-03 9.32999730e-03 8.68964326e-03 4.86836385e-02
1.07320979e-01 1.69766674e+01]
not reaching the requested tolerance 1.4901161193847656e-05.
Use iteration 21 instead with accuracy
0.3431597728816552.
_, eigvec = lobpcg(XH_X, X, tol=tol ** 2, maxiter=maxiter,
/home/vincent/.local/lib/python3.9/site-packages/scipy/sparse/linalg/_eigen/_svds.py:487: UserWarning: Exited postprocessing with accuracies
[4.81829694e-08 4.33156171e-07 4.15085166e-08 1.18443928e-07
4.10329817e-08 1.13370684e-07 7.08724601e-07 1.37826510e-06
1.51156757e-07 5.47290646e-07 2.09345190e-06 3.11020503e-07
1.04560254e-06 8.90149173e-08 2.06879686e-07 1.38642587e-07
1.12966102e-06 7.70430839e-07 1.15727319e-06 2.44537198e-06
4.45026163e-07 1.09566625e-06 2.38218842e-07 1.70461422e-06
4.95173291e-07 4.02321463e-07 2.41768558e-06 6.03567884e-07
2.19774360e-06 1.21802253e-06 5.84547692e-07 4.03831660e-06
1.93482576e-06 2.18726418e-06 2.79223102e-06 6.98547527e-06
1.46476658e-06 4.01439911e-06 3.26532234e-06 1.65977890e-05
1.99434840e-05 1.90039834e-04 2.84961507e-04 8.43750076e-04
5.89064660e-03 9.32999723e-03 8.68964326e-03 4.86836385e-02
1.07320979e-01 1.69766674e+01]
not reaching the requested tolerance 1.4901161193847656e-05.
_, eigvec = lobpcg(XH_X, X, tol=tol ** 2, maxiter=maxiter,
sc.pp.neighbors(adata, n_neighbors=10, n_pcs=30)
sc.tl.umap(adata)
sc.pl.umap(adata, color=['CellType_Category'])
/home/vincent/.local/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:394: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
cax = scatter(
sc.pl.umap(adata, color=['Disease_Identity'])
/home/vincent/.local/lib/python3.9/site-packages/scanpy/plotting/_tools/scatterplots.py:394: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored
cax = scatter(
Asked for 200 genes, because one imagines that various stakeholders will add another 100 for IPF subsets, and probably asthma stakeholders will add another 100 bringing total to 400. Note that this dataset includes normal lung and COPD lung.
selector = sp.se.ProbesetSelector(adata, n=200, celltype_key="Manuscript_Identity", verbosity=0, save_dir=None)
Note: The following celltypes' test set sizes for forest training are below min_test_n (=20):
Ionocyte : 6
PNEC : 12
The genes selected for those cell types potentially don't generalize well. Find the genes for each of those cell types in self.genes_of_primary_trees after running self.select_probeset().
selector.select_probeset()
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/joblib/externals/loky/process_executor.py:752: UserWarning: A worker stopped while some jobs were given to the executor. This can be caused by a too short worker timeout or by a memory leak.
warnings.warn(
selector.probeset[selector.probeset.selection].to_csv('SPAPROS-basic200.csv')
What is the distribution of chosen probes among methods?
selector.plot_gene_overlap()
/home/vincent/.local/lib/python3.9/site-packages/spapros/plotting/plot.py:1241: UserWarning: This figure includes Axes that are not compatible with tight_layout, so results might be incorrect.
plt.tight_layout()
I don’t understand what the 4th column is, 33 genes that have no attribution for the method of selection?
# list probes
selector.probeset.index[selector.probeset.selection]
Index(['TPSB2', 'CCL21', 'GRP', 'EMP2', 'HYDIN', 'MIR205HG', 'TPM2', 'ZNF385D',
'FCN3', 'S100B',
...
'COL6A2', 'CXCL5', 'TIMP3', 'AGBL4', 'ADAM19', 'BATF', 'LIMCH1',
'SKAP1', 'CTSW', 'ABLIM1'],
dtype='object', length=200)
Table of characteristics for selected probes
probe_candidates = selector.probeset[selector.probeset.selection]
probe_candidates
gene_nr selection ... required_marker required_list_marker
TPSB2 1 True ... True False
CCL21 2 True ... True False
GRP 3 True ... True False
EMP2 4 True ... True False
HYDIN 5 True ... True False
... ... ... ... ... ...
BATF 196 True ... False False
LIMCH1 197 True ... False False
SKAP1 198 True ... False False
CTSW 199 True ... False False
ABLIM1 200 True ... False False
[200 rows x 17 columns]
probes = probe_candidates[(probe_candidates['celltypes_DE_1vsall']!='')]
probes = probes.sort_values('celltypes_DE')
sc.pl.dotplot(adata, probes.index[0:40], groupby='Manuscript_Identity', dendrogram=False)
/home/vincent/.local/lib/python3.9/site-packages/scanpy/plotting/_dotplot.py:747: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored
dot_ax.scatter(x, y, **kwds)
sc.pl.dotplot(adata, probes.index[40::], groupby='Manuscript_Identity', dendrogram=False)
/home/vincent/.local/lib/python3.9/site-packages/scanpy/plotting/_dotplot.py:747: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored
dot_ax.scatter(x, y, **kwds)
probes = probe_candidates[(probe_candidates['celltypes_DE_specific']!='')]
probes = probes.sort_values('celltypes_DE')
sc.pl.dotplot(adata, probes.index[0:25], groupby='Manuscript_Identity', dendrogram=False)
/home/vincent/.local/lib/python3.9/site-packages/scanpy/plotting/_dotplot.py:747: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored
dot_ax.scatter(x, y, **kwds)
sc.pl.dotplot(adata, probes.index[25::], groupby='Manuscript_Identity', dendrogram=False)
/home/vincent/.local/lib/python3.9/site-packages/scanpy/plotting/_dotplot.py:747: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored
dot_ax.scatter(x, y, **kwds)
probes = probe_candidates[(probe_candidates['pca_selected'])]
probes = probes.sort_values('celltypes_DE')
sc.pl.dotplot(adata, probes.index[0:33], groupby='Manuscript_Identity', dendrogram=False)
/home/vincent/.local/lib/python3.9/site-packages/scanpy/plotting/_dotplot.py:747: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored
dot_ax.scatter(x, y, **kwds)
sc.pl.dotplot(adata, probes.index[33:66], groupby='Manuscript_Identity', dendrogram=False)
/home/vincent/.local/lib/python3.9/site-packages/scanpy/plotting/_dotplot.py:747: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored
dot_ax.scatter(x, y, **kwds)
sc.pl.dotplot(adata, probes.index[66::], groupby='Manuscript_Identity', dendrogram=False)
/home/vincent/.local/lib/python3.9/site-packages/scanpy/plotting/_dotplot.py:747: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored
dot_ax.scatter(x, y, **kwds)
We already have some xenium runs where some probesets have reasonable average detection and show up as highly-variable genes in clustering of the Xenium data. Therefore I regard these genes as presumptively high-performance informative genes. Including these in the diversity panel will mitigate the risk of selecting genes whose probesets do not perform well for technical reasons. Therefore ask SPAPROS to include these genes and then modify its other selections accordingly.
length(probesets_selected)
[1] 72
preselected_genes=r.probesets_selected
# Try to account for selected genes that are in PCA genes.
tmp = sp.se.select_pca_genes(adata, n=adata.n_vars, inplace=False)["selection_ranking"]
n_pca_genes = tmp.loc[~tmp.index.isin(preselected_genes)].sort_values().iloc[:20].max().astype(int)
For PCA ranking genes, there are only 2 (S100A8 and AREG) that are in the top 100 PCA genes. So I think we are reasonably safe to just add more PCA genes on top of these.
selector = sp.se.ProbesetSelector(adata, n=200, celltype_key="Manuscript_Identity", verbosity=0, save_dir=None, preselected_genes=preselected_genes)
Note: The following celltypes' test set sizes for forest training are below min_test_n (=20):
Ionocyte : 6
PNEC : 12
The genes selected for those cell types potentially don't generalize well. Find the genes for each of those cell types in self.genes_of_primary_trees after running self.select_probeset().
selector.select_probeset()
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'logfoldchanges'] = np.log2(
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:396: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'names'] = self.var_names[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:398: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'scores'] = scores[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:401: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'pvals'] = pvals[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:411: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'pvals_adj'] = pvals_adj[global_indices]
/home/vincent/.local/lib/python3.9/site-packages/scanpy/tools/_rank_genes_groups.py:422: PerformanceWarning: DataFrame is highly fragmented. This is usually the result of calling `frame.insert` many times, which has poor performance. Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`
self.stats[group_name, 'logfoldchanges'] = np.log2(
selector.probeset[selector.probeset.selection].to_csv('SPAPROS-200withXeniumCuration.csv')
What is the distribution of chosen probes among methods?
selector.plot_gene_overlap()
/home/vincent/.local/lib/python3.9/site-packages/spapros/plotting/plot.py:1241: UserWarning: This figure includes Axes that are not compatible with tight_layout, so results might be incorrect.
plt.tight_layout()
# list probes
selector.probeset.index[selector.probeset.selection]
Index(['IL1RL1', 'S100A8', 'VWF', 'IRF4', 'AREG', 'KRT7', 'KIT', 'KRT15',
'PLA2G2A', 'SCEL',
...
'PPP1R16B', 'BAG3', 'HSPA1A', 'HSPA1B', 'ENSG00000276085', 'CCL20',
'MT1G', 'STAT4', 'CCL18', 'CCL2'],
dtype='object', length=200)
Table of characteristics for selected probes
probe_candidates = selector.probeset[selector.probeset.selection]
probe_candidates
gene_nr selection ... required_marker required_list_marker
IL1RL1 1 True ... True False
S100A8 2 True ... True False
VWF 3 True ... True False
IRF4 4 True ... True False
AREG 5 True ... True False
... ... ... ... ... ...
CCL20 196 True ... False False
MT1G 197 True ... False False
STAT4 198 True ... False False
CCL18 199 True ... False False
CCL2 200 True ... False False
[200 rows x 17 columns]
probes = probe_candidates[(probe_candidates['celltypes_DE_1vsall']!='')]
probes = probes.sort_values('celltypes_DE')
sc.pl.dotplot(adata, probes.index[0:40], groupby='Manuscript_Identity', dendrogram=False)
/home/vincent/.local/lib/python3.9/site-packages/scanpy/plotting/_dotplot.py:747: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored
dot_ax.scatter(x, y, **kwds)
sc.pl.dotplot(adata, probes.index[40::], groupby='Manuscript_Identity', dendrogram=False)
/home/vincent/.local/lib/python3.9/site-packages/scanpy/plotting/_dotplot.py:747: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored
dot_ax.scatter(x, y, **kwds)
probes = probe_candidates[(probe_candidates['celltypes_DE_specific']!='')]
probes = probes.sort_values('celltypes_DE')
sc.pl.dotplot(adata, probes.index[0:25], groupby='Manuscript_Identity', dendrogram=False)
/home/vincent/.local/lib/python3.9/site-packages/scanpy/plotting/_dotplot.py:747: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored
dot_ax.scatter(x, y, **kwds)
sc.pl.dotplot(adata, probes.index[25::], groupby='Manuscript_Identity', dendrogram=False)
/home/vincent/.local/lib/python3.9/site-packages/scanpy/plotting/_dotplot.py:747: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored
dot_ax.scatter(x, y, **kwds)
probes = probe_candidates[(probe_candidates['pca_selected'])]
probes = probes.sort_values('celltypes_DE')
sc.pl.dotplot(adata, probes.index[0:33], groupby='Manuscript_Identity', dendrogram=False)
/home/vincent/.local/lib/python3.9/site-packages/scanpy/plotting/_dotplot.py:747: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored
dot_ax.scatter(x, y, **kwds)
sc.pl.dotplot(adata, probes.index[33:66], groupby='Manuscript_Identity', dendrogram=False)
/home/vincent/.local/lib/python3.9/site-packages/scanpy/plotting/_dotplot.py:747: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored
dot_ax.scatter(x, y, **kwds)
sc.pl.dotplot(adata, probes.index[66::], groupby='Manuscript_Identity', dendrogram=False)
/home/vincent/.local/lib/python3.9/site-packages/scanpy/plotting/_dotplot.py:747: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap', 'norm' will be ignored
dot_ax.scatter(x, y, **kwds)
Penalties are 0 to 1, where 0 is bad and 1 is best (i.e., no penalty). Behavior of penalties seems to depend on the the source. If identified by the package, it’s a multiplier. If the source is from a manually curated list, all genes with any penalty is removed.
We were told by 10x that SCGB1A1 and SFTPC had too high abundance to design probes. COL1A1 is the highest single detected gene in trial runs; I am not aware of any codeword budget issues with COL1A1.
sc.pl.violin(adata, ['SCGB1A1', 'SFTPC', 'COL1A1'])
sc.pl.violin(adata, ['GDF15', 'CLDN4', 'KRT8'])
If we wanted to be fairly aggressive, a threshold of like 1 to 6 would get us genes that are like COL1A1 or even more highly expressed. Some high performance genes like KRT8 wouldn’t necessarily be selected.
# Set thresholds
lower_th = 1.0
upper_th = 6
FACTOR = 0.1
# Calculate quantiles
sp.ut.get_expression_quantile(adata, q=0.99, normalise=False, log1p=False, zeros_to_nan=False)
sp.ut.get_expression_quantile(adata, q=0.9, normalise=False, log1p=False, zeros_to_nan=True)
# Get penalty functions for given factor
penalty_fcts={}
penalty_fcts[f"lower_{FACTOR}"] = sp.ut.plateau_penalty_kernel(var=0.1 * FACTOR, x_min=lower_th, x_max=None)
penalty_fcts[f"upper_{FACTOR}"] = sp.ut.plateau_penalty_kernel(var=0.5*FACTOR, x_min=None, x_max=upper_th)
# Calculate each gene's penalty value
adata.var[f"expr_penalty_lower_{FACTOR}"] = penalty_fcts[f"lower_{FACTOR}"](adata.var['quantile_0.9 expr > 0'])
adata.var[f"expr_penalty_upper_{FACTOR}"] = penalty_fcts[f"upper_{FACTOR}"](adata.var['quantile_0.99'])
# PCA and DE selections with penalties
penalty_keys = [f"expr_penalty_lower_{FACTOR}",f"expr_penalty_upper_{FACTOR}"]
adata.var["expr_penalty_lower"] = adata.var[f"expr_penalty_lower_{FACTOR}"]
adata.var["expr_penalty_upper"] = adata.var[f"expr_penalty_upper_{FACTOR}"]
Run the selection. In this run we have not seeded any specific number of genes, curious what it comes up with.
# create an instance of the ProbesetSelector class
selector_highexpression = sp.se.ProbesetSelector(
adata,
n=None,
celltype_key="Manuscript_Identity",
verbosity=1,
save_dir=None,
pca_penalties=["expr_penalty_lower", "expr_penalty_upper"],
DE_penalties=["expr_penalty_lower", "expr_penalty_upper"],
m_penalties_adata_celltypes=["expr_penalty_lower", "expr_penalty_upper"],
m_penalties_list_celltypes=["expr_penalty_upper"],
)
selector_highexpression.select_probeset()
selector_highexpression.probeset[selector_highexpression.probeset.selection].to_csv('SPAPROS-highexpression.csv')
What is the distribution of chosen probes among methods?
selector_highexpression.plot_gene_overlap()
# list probes
selector_highexpression.probeset.index[selector_highexpression.probeset.selection]
Table of characteristics for selected probes
probe_candidates = selector_highexpression.probeset[selector_highexpression.probeset.selection]
probe_candidates
probes = probe_candidates[(probe_candidates['celltypes_DE_1vsall']!='')]
probes = probes.sort_values('celltypes_DE')
sc.pl.dotplot(adata, probes.index[0:40], groupby='Manuscript_Identity', dendrogram=False)
sc.pl.dotplot(adata, probes.index[40::], groupby='Manuscript_Identity', dendrogram=False)
probes = probe_candidates[(probe_candidates['celltypes_DE_specific']!='')]
probes = probes.sort_values('celltypes_DE')
sc.pl.dotplot(adata, probes.index[0:15], groupby='Manuscript_Identity', dendrogram=False)
sc.pl.dotplot(adata, probes.index[15::], groupby='Manuscript_Identity', dendrogram=False)
probes = probe_candidates[(probe_candidates['pca_selected'])]
probes = probes.sort_values('celltypes_DE')
sc.pl.dotplot(adata, probes.index[0:33], groupby='Manuscript_Identity', dendrogram=False)
sc.pl.dotplot(adata, probes.index[33:66], groupby='Manuscript_Identity', dendrogram=False)
sc.pl.dotplot(adata, probes.index[66::], groupby='Manuscript_Identity', dendrogram=False)
How well are probes correlated with each other?
Set up evaluation.
# I think there is at least one bug in the code that expects "celltype" as an element.
adata.obs['celltype']=adata.obs['Manuscript_Identity']
# instantiate evaluator
evaluator = sp.ev.ProbesetEvaluator(adata, celltype_key='Manuscript_Identity', metrics={'cluster_similarity','knn_overlap','forest_clfs','marker_corr','gene_corr'}, verbosity=2, results_dir=None)
xenium_panel1_genes = pd.read_csv('Xenium_panel_order_08_10_23.csv')['Gene']
evaluator.evaluate_probeset(xenium_panel1_genes, set_id="Xenium Lung Trial")
evaluator.evaluate_probeset(selector.probeset.index[selector.probeset.selection], set_id="SPAPROS basic")
evaluator.evaluate_probeset(selector_highexpression.probeset.index[selector_highexpression.probeset.selection].tolist(), set_id="SPAPROS high expression minimal")
evaluator.summary_statistics()
evaluator.plot_summary()
# vignette uses a different method, this is the one in the wrapper functions listing
evaluator.plot_marker_corr()